'''
检查是否存在同名文件
'''
dir  = '/mnt/nas/shengjie/datasets/cloth_collar_balanced'
import os
s = set()
count = 0
for entry in os.scandir(dir):
    filename = entry.name
    if filename not in s:
        s.add(filename)
    else:
        count += 1
        print('重复文件:',filename,end=' ')
        print(os.path.getsize( os.path.join( dir , filename ) ) / 1024 ,'KB' )
    if count > 1000:
        print()
        print('不重复数量:',len(s))
        break
print('重复数量:',count)
